########################################################################################
# Script: Streng naturalisatiebeleid ontmoedigt immigranten uit laagontwikkelde landen #
# Floris Peters, Maarten Vink & Hans Schmeets                                          #
# Economisch Statistische Berichten                                                    #
########################################################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\ESB_naturalisation 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)

# START
# (Time vector - start of a given time period)

# STOP
# (Time vector - end of a given time period)

# EVENT
# (Acquiring Dutch citizenship)
# [0] The event does not occur during this time period; 
# [1] The event occurs at the end of this time period

# COHORT
# (Year of first immigration to the Netherlands)

# GENDER
# [0] Female; 
# [1] Male

# AGEARRIVAL
# (Age at the moment of migration in years)

# AGECAT (Age at the moment of migration in categories) 
# [1] 15-17 years; 
# [2] 18-24 years; 
# [3] 25-34 years; 
# [4] 35-44 years; 
# [5] 45-54 years; 
# [6] 55-64 years; 
# [7] 65-74 years; 
# [8] >74 years

# PARTNER
# [1] No partner; 
# [2] Native Dutch partner; 
# [3] Foreign born foreign partner; 
# [4] Year naturalization partner; 
# [5] 1 year after naturalization partner; 
# [6] 2 years after naturalization partner; 
# [7] 3 years after naturalization partner; 
# [8] >3 years after naturalization partner 

# CHILD 
# (Age of the youngest child in the household in years)
# [999] No children in the household

# CHILDREC
# (Children in the household in categories)
# [1] Children <18 in household; 
# [2] no children <18 in household

# DEVELOPMENT
# (Human Development Index (HDI) score origin country)

# DEVCAT
# (Human Development index (HDI) score origin country in categories)
# [1] First quartile; 
# [2] Second quartile; 
# [3] Third quartile; 
# [4] Fourth quartile

#######################################################################################################################
#######################################################################################################################


#Upload the dataset (Data_cit_acq_main.sav) to R and load the necessary libraries
library(splines)
library(foreign)
library(survival)
dataset_main <- read.csv(file.choose(),header=T,sep=";")


###########     
# Table 1 #
###########   

#select last observation in survival structure (i.e. last observation or event)
dataset_main_last_obs <- dataset_main %>% 
  group_by(ID) %>%
  summarise(lastocc = last(YEAR))

#proportion naturalized (last observation per individual) by subgroups
prop.table(dataset_main_last_obs$GENDER,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$AGECAT,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$PARTNER,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$CHILDREC,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$DEVCAT,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$EVENT)


############     
# Figure 1 #
############ 
    
#create subgroups by cohort
dataset_early <- subset(dataset_main, COHORT <= 1997)
dataset_mid <- subset(dataset_main, COHORT >= 1998 & COHORT <= 1999)
dataset_late <- subset(dataset_main, COHORT >= 2000)

#compute survival functions
dataset_early$surv_early <- Surv(dataset_early$START, dataset_early$STOP, dataset_early$EVENT)
dataset_mid$surv_mid <- Surv(dataset_mid$START, dataset_mid$STOP, dataset_mid$EVENT)
dataset_late$surv_late <- Surv(dataset_late$START, dataset_late$STOP, dataset_late$EVENT)

#cohort 1995-1997
plot1 <- survfit(surv_early ~ 1)
figure1.1 <- plot(plot1, ylim = c(.2, 1), xlim = c(156, 520), 
                xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                col = 2, lty = 1)

#retain the previous plot
Par(new=T)

#cohort 1998-1999
plot1.2 <- survfit(surv_mid ~ 1)
figure1.2 <- plot(plot1.2, ylim = c(.2, 1), xlim = c(156, 520), 
                xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                col = 2, lty = 1)

#retain the previous plot
Par(new=T)

#cohort 2000-2002
plot1.3 <- survfit(surv_late ~ 1)
figure1.3 <- plot(plot1.3, ylim = c(.2, 1), xlim = c(156, 520), 
                  xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                  col = 2, lty = 1)
legend.figure1 <- c("Cohort 1995-1997", "Cohort 1998-1999", "Cohort 2000-2002")
legend("bottom", legend = legend.figure1, col = c(2,3,4), lty = c(1,1,1))


############     
# Figure 2 #
############ 

#create subgroups by cohort and HDI median
dataset_early_lowHDI <- subset(dataset_main, COHORT <= 1997 & DEVCAT <= 2)
dataset_early_highHDI <- subset(dataset_main, COHORT <= 1997 & DEVCAT >= 2)

#compute the survival functions
dataset_early_lowHDI$surv_early_lowHDI <- Surv(dataset_early_lowHDI$START, dataset_early_lowHDI$STOP, dataset_early_lowHDI$EVENT)
dataset_early_highHDI$surv_early_highHDI <- Surv(dataset_early_highHDI$START, dataset_early_highHDI$STOP, dataset_early_highHDI$EVENT)

#cohort 1995-1997
plot2.1 <- survfit(surv_early_lowHDI ~ 1)
figure2.1 <- plot(plot2.1, ylim = c(.2, 1), xlim = c(156, 520), 
                  xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                  col = 2, lty = 1)

#retain the previous plot
Par(new=T)

#cohort 2000-2002
plot2.2 <- survfit(surv_early_highHDI ~ 1)
figure12.2 <- plot(plot2.2, ylim = c(.2, 1), xlim = c(156, 520), 
                  xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                  col = 2, lty = 1)

legend.figure2 <- c("Hoogontwikkeld herkomstland", "Laagontwikkeld herkomstland")
legend("bottom", legend = legend.figure2, col = c(2,3,4), lty = c(1,1,1))


############     
# Figure 3 #
############ 

#create subgroups by cohort and HDI median
dataset_late_lowHDI <- subset(dataset_main, COHORT >= 2000 & DEVCAT <= 2)
dataset_late_highHDI <- subset(dataset_main, COHORT >= 2000 & DEVCAT >= 2)

#compute the survival functions
dataset_late_lowHDI$surv_late_lowHDI <- Surv(dataset_late_lowHDI$START, dataset_late_lowHDI$STOP, dataset_late_lowHDI$EVENT)
dataset_late_highHDI$surv_late_highHDI <- Surv(dataset_late_highHDI$START, dataset_late_highHDI$STOP, dataset_late_highHDI$EVENT)

#cohort 1995-1997
plot3.1 <- survfit(surv_late_lowHDI ~ 1)
figure3.1 <- plot(plot3.1, ylim = c(.2, 1), xlim = c(156, 520), 
                  xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                  col = 2, lty = 1)

#retain the previous plot
Par(new=T)

#cohort 2000-2002
plot3.2 <- survfit(surv_late_highHDI ~ 1)
figure3.2 <- plot(plot3.2, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Aantal verblijfsweken", ylab = "Proportie niet genaturaliseerd", 
                   col = 2, lty = 1)

legend.figure3 <- c("Hoogontwikkeld herkomstland", "Laagontwikkeld herkomstland")
legend("bottom", legend = legend.figure3, col = c(2,3,4), lty = c(1,1,1))

